<!doctype html>



  


<html class="theme-next mist use-motion" lang="en">
<head>
  <!-- hexo-inject:begin --><!-- hexo-inject:end --><meta charset="UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1"/>



<meta http-equiv="Cache-Control" content="no-transform" />
<meta http-equiv="Cache-Control" content="no-siteapp" />












  
  
  <link href="/lib/fancybox/source/jquery.fancybox.css?v=2.1.5" rel="stylesheet" type="text/css" />




  
  
  
  

  
    
    
  

  

  

  

  

  
    
    
    <link href="//fonts.googleapis.com/css?family=Lato:300,300italic,400,400italic,700,700italic&subset=latin,latin-ext" rel="stylesheet" type="text/css">
  






<link href="/lib/font-awesome/css/font-awesome.min.css?v=4.6.2" rel="stylesheet" type="text/css" />

<link href="/css/main.css?v=5.1.0" rel="stylesheet" type="text/css" />


  <meta name="keywords" content="Hadoop," />








  <link rel="shortcut icon" type="image/x-icon" href="/favicon.ico?v=5.1.0" />






<meta property="og:type" content="article">
<meta property="og:title" content="Hadoop Ecosystem">
<meta property="og:url" content="https://annashuo.github.io/2017/04/27/Ecosystem/index.html">
<meta property="og:site_name" content="Anna's Blog">
<meta property="og:image" content="https://github.com/Annashuo/hadoop_project/blob/master/hadoop_ecosystem.png?raw=true">
<meta property="og:updated_time" content="2017-04-27T09:20:12.000Z">
<meta name="twitter:card" content="summary">
<meta name="twitter:title" content="Hadoop Ecosystem">
<meta name="twitter:image" content="https://github.com/Annashuo/hadoop_project/blob/master/hadoop_ecosystem.png?raw=true">



<script type="text/javascript" id="hexo.configurations">
  var NexT = window.NexT || {};
  var CONFIG = {
    root: '/',
    scheme: 'Mist',
    sidebar: {"position":"right","display":"post"},
    fancybox: true,
    motion: true,
    duoshuo: {
      userId: '0',
      author: 'Author'
    },
    algolia: {
      applicationID: '',
      apiKey: '',
      indexName: '',
      hits: {"per_page":10},
      labels: {"input_placeholder":"Search for Posts","hits_empty":"We didn't find any results for the search: ${query}","hits_stats":"${hits} results found in ${time} ms"}
    }
  };
</script>



  <link rel="canonical" href="https://annashuo.github.io/2017/04/27/Ecosystem/"/>





  <title> Hadoop Ecosystem | Anna's Blog </title><!-- hexo-inject:begin --><!-- hexo-inject:end -->
</head>

<body itemscope itemtype="http://schema.org/WebPage" lang="en">

  
<!-- hexo-inject:begin --><!-- hexo-inject:end --><script>
  window.fbAsyncInit = function() {
    FB.init({
      appId      : '',
      xfbml      : true,
      version    : 'v2.6'
    });
  };

  (function(d, s, id){
     var js, fjs = d.getElementsByTagName(s)[0];
     if (d.getElementById(id)) {return;}
     js = d.createElement(s); js.id = id;
     js.src = "//connect.facebook.net/en/sdk.js";
     fjs.parentNode.insertBefore(js, fjs);
   }(document, 'script', 'facebook-jssdk'));
</script>



<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
            (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
          m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
  ga('create', 'UA-105321506-1', 'auto');
  ga('send', 'pageview');
</script>


  <script type="text/javascript">
    var _hmt = _hmt || [];
    (function() {
      var hm = document.createElement("script");
      hm.src = "https://hm.baidu.com/hm.js?6f5ab2ef1e021070500f7228c830f3c6";
      var s = document.getElementsByTagName("script")[0];
      s.parentNode.insertBefore(hm, s);
    })();
  </script>








  
  
    
  

  <div class="container one-collumn sidebar-position-right page-post-detail ">
    <div class="headband"></div>

    <header id="header" class="header" itemscope itemtype="http://schema.org/WPHeader">
      <div class="header-inner"><div class="site-meta ">
  

  <div class="custom-logo-site-title">
    <a href="/"  class="brand" rel="start">
      <span class="logo-line-before"><i></i></span>
      <span class="site-title">Anna's Blog</span>
      <span class="logo-line-after"><i></i></span>
    </a>
  </div>
    
      <p class="site-subtitle">My heart is in the work...</p>
    
</div>

<div class="site-nav-toggle">
  <button>
    <span class="btn-bar"></span>
    <span class="btn-bar"></span>
    <span class="btn-bar"></span>
  </button>
</div>

<nav class="site-nav">
  

  
    <ul id="menu" class="menu">
      
        
        <li class="menu-item menu-item-home">
          <a href="/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-home"></i> <br />
            
            Home
          </a>
        </li>
      
        
        <li class="menu-item menu-item-categories">
          <a href="/categories" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-th"></i> <br />
            
            Categories
          </a>
        </li>
      
        
        <li class="menu-item menu-item-archives">
          <a href="/archives" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-archive"></i> <br />
            
            Archives
          </a>
        </li>
      
        
        <li class="menu-item menu-item-tags">
          <a href="/tags" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-tags"></i> <br />
            
            Tags
          </a>
        </li>
      

      
    </ul>
  

  
</nav>



 </div>
    </header>

    <main id="main" class="main">
      <div class="main-inner">
        <div class="content-wrap">
          <div id="content" class="content">
            

  <div id="posts" class="posts-expand">
    

  

  
  
  

  <article class="post post-type-normal " itemscope itemtype="http://schema.org/Article">
  <link itemprop="mainEntityOfPage" href="https://annashuo.github.io/2017/04/27/Ecosystem/">

  <span style="display:none" itemprop="author" itemscope itemtype="http://schema.org/Person">
    <meta itemprop="name" content="Anna">
    <meta itemprop="description" content="">
    <meta itemprop="image" content="/images/avatar.gif">
  </span>

  <span style="display:none" itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
    <meta itemprop="name" content="Anna's Blog">
    <span style="display:none" itemprop="logo" itemscope itemtype="http://schema.org/ImageObject">
      <img style="display:none;" itemprop="url image" alt="Anna's Blog" src="">
    </span>
  </span>

    
      <header class="post-header">

        
        
          <h1 class="post-title" itemprop="name headline">
            
            
              
                Hadoop Ecosystem
              
            
          </h1>
        

        <div class="post-meta">
          <span class="post-time">
            
              <span class="post-meta-item-icon">
                <i class="fa fa-calendar-o"></i>
              </span>
              
                <span class="post-meta-item-text">Posted on</span>
              
              <time title="Post created" itemprop="dateCreated datePublished" datetime="2017-04-27T17:12:18-04:00">
                2017-04-27
              </time>
            

            

            
          </span>

          
            <span class="post-category" >
            
              <span class="post-meta-divider">|</span>
            
              <span class="post-meta-item-icon">
                <i class="fa fa-folder-o"></i>
              </span>
              
                <span class="post-meta-item-text">In</span>
              
              
                <span itemprop="about" itemscope itemtype="http://schema.org/Thing">
                  <a href="/categories/Hadoop-Project/" itemprop="url" rel="index">
                    <span itemprop="name">Hadoop Project</span>
                  </a>
                </span>

                
                
              
            </span>
          

          
            
              <span class="post-comments-count">
                <span class="post-meta-divider">|</span>
                <span class="post-meta-item-icon">
                  <i class="fa fa-comment-o"></i>
                </span>
                <a href="/2017/04/27/Ecosystem/#comments" itemprop="discussionUrl">
                  <span class="post-comments-count fb-comments-count" data-href="https://annashuo.github.io/2017/04/27/Ecosystem/" itemprop="commentCount">0</span> comments
                </a>
              </span>
            
          

          

          
          

          

          

        </div>
      </header>
    


    <div class="post-body" itemprop="articleBody">

      
      

      
        <p><img src="https://github.com/Annashuo/hadoop_project/blob/master/hadoop_ecosystem.png?raw=true" alt="Hadoop_Ecosystem"></p>
<a id="more"></a>
<h4 id="HDFS-Hadoop-Distributed-File-System"><a href="#HDFS-Hadoop-Distributed-File-System" class="headerlink" title="HDFS: Hadoop Distributed File System"></a>HDFS: Hadoop Distributed File System</h4><p>It allows to distribute the storage to big data across all clusters of computer. It also maintain the redundant copy of that data. If one of computer breaks down, it can recover from that backup.</p>
<h4 id="YARN-Yet-Another-Resource-Negotiator"><a href="#YARN-Yet-Another-Resource-Negotiator" class="headerlink" title="YARN: Yet Another Resource Negotiator"></a>YARN: Yet Another Resource Negotiator</h4><p>It is the data storage part of hadoop and also the data manage part of hadoop. It is a system that manage the storage of the data on your computing clusters</p>
<h4 id="MapReduce"><a href="#MapReduce" class="headerlink" title="MapReduce"></a>MapReduce</h4><p>A programming model that allows to process your data across the data clusters. Mapper has been builted to transfer your data in parallel across different computing cluster and map; Reducer is to aggregate data together.</p>
<h4 id="Pig"><a href="#Pig" class="headerlink" title="Pig"></a>Pig</h4><p>A high level API allows you to write scripts like SQL to request complex result without writing java or python.</p>
<h4 id="Hive"><a href="#Hive" class="headerlink" title="Hive"></a>Hive</h4><p>It is more directly to use SQL, making your storage system more look like a SQL database. </p>
<h4 id="Apache-Ambari"><a href="#Apache-Ambari" class="headerlink" title="Apache Ambari"></a>Apache Ambari</h4><p>It is a tool for you to have a view on have data moved through clusters or how application functioning on the data.</p>
<h4 id="MESOS"><a href="#MESOS" class="headerlink" title="MESOS"></a>MESOS</h4><p>It is also a resources negotiator like YARN.</p>
<h4 id="Spark"><a href="#Spark" class="headerlink" title="Spark"></a>Spark</h4><p>Run query on your data. You need to write Spark script using either python or java. Spark is really fast technology. It runs efficiently and reliabily on your clusters.</p>
<h4 id="TEZ"><a href="#TEZ" class="headerlink" title="TEZ"></a>TEZ</h4><p>It often use with hive to accelerate the programing running. </p>
<h4 id="HBASE"><a href="#HBASE" class="headerlink" title="HBASE"></a>HBASE</h4><p>A No-SQL database. It exposes your data to transfer from an platform. It is a very fast database technology to transfer data from different clusters or platforms.</p>
<h4 id="STORM"><a href="#STORM" class="headerlink" title="STORM"></a>STORM</h4><p>A way for processing streaming data quickly.</p>
<h4 id="OOZIE"><a href="#OOZIE" class="headerlink" title="OOZIE"></a>OOZIE</h4><p>A way as scheduling jobs on your clusters. There may be different steps and using different system in hadoop. OOZIE is a way to schedule all these things as jobs.</p>
<h4 id="ZooKeeper"><a href="#ZooKeeper" class="headerlink" title="ZooKeeper"></a>ZooKeeper</h4><p>It is basically a technology to coordinate all these technologies as nodes. It can track which node is up and which node is down.</p>
<h4 id="Data-Ingestion"><a href="#Data-Ingestion" class="headerlink" title="Data Ingestion"></a>Data Ingestion</h4><p>transform your own database or web server data into a HDFS.</p>

      
    </div>

    <div>
      
        

      
    </div>

    <div>
      
        

      
    </div>


    <footer class="post-footer">
      
        <div class="post-tags">
          
            <a href="/tags/Hadoop/" rel="tag"># Hadoop</a>
          
        </div>
      

      
        <div class="post-nav">
          <div class="post-nav-next post-nav-item">
            
              <a href="/2017/04/10/csapp-chapter8/" rel="next" title="csapp-chapter8">
                <i class="fa fa-chevron-left"></i> csapp-chapter8
              </a>
            
          </div>

          <span class="post-nav-divider"></span>

          <div class="post-nav-prev post-nav-item">
            
              <a href="/2017/05/02/recommendation-sequential/" rel="prev" title="recommendation-sequential">
                recommendation-sequential <i class="fa fa-chevron-right"></i>
              </a>
            
          </div>
        </div>
      

      
      
    </footer>
  </article>



    <div class="post-spread">
      
    </div>
  </div>

          
          </div>
          


          
  <div class="comments" id="comments">
    
      <div class="fb-comments"
           data-href="https://annashuo.github.io/2017/04/27/Ecosystem/"
           data-numposts="10"
           data-width="100%"
           data-colorscheme="light">
      </div>
    
  </div>


        </div>
        
          
  
  <div class="sidebar-toggle">
    <div class="sidebar-toggle-line-wrap">
      <span class="sidebar-toggle-line sidebar-toggle-line-first"></span>
      <span class="sidebar-toggle-line sidebar-toggle-line-middle"></span>
      <span class="sidebar-toggle-line sidebar-toggle-line-last"></span>
    </div>
  </div>

  <aside id="sidebar" class="sidebar">
    <div class="sidebar-inner">

      

      
        <ul class="sidebar-nav motion-element">
          <li class="sidebar-nav-toc sidebar-nav-active" data-target="post-toc-wrap" >
            Table of Contents
          </li>
          <li class="sidebar-nav-overview" data-target="site-overview">
            Overview
          </li>
        </ul>
      

      <section class="site-overview sidebar-panel">
        <div class="site-author motion-element" itemprop="author" itemscope itemtype="http://schema.org/Person">
          <img class="site-author-image" itemprop="image"
               src="/images/avatar.gif"
               alt="Anna" />
          <p class="site-author-name" itemprop="name">Anna</p>
          <p class="site-description motion-element" itemprop="description">My heart is in the work...</p>
        </div>
        <nav class="site-state motion-element">
        
          
            <div class="site-state-item site-state-posts">
              <a href="/archives">
                <span class="site-state-item-count">43</span>
                <span class="site-state-item-name">posts</span>
              </a>
            </div>
          

          
            <div class="site-state-item site-state-categories">
              <a href="/categories">
                <span class="site-state-item-count">6</span>
                <span class="site-state-item-name">categories</span>
              </a>
            </div>
          

          
            <div class="site-state-item site-state-tags">
              <a href="/tags">
                <span class="site-state-item-count">12</span>
                <span class="site-state-item-name">tags</span>
              </a>
            </div>
          

        </nav>

        

        <div class="links-of-author motion-element">
          
            
              <span class="links-of-author-item">
                <a href="https://github.com/Annashuo/Annashuo.github.io" target="_blank" title="GitHub">
                  
                    <i class="fa fa-fw fa-github"></i>
                  
                  GitHub
                </a>
              </span>
            
              <span class="links-of-author-item">
                <a href="http://weibo.com/u/1861044584?is_all=1" target="_blank" title="Weibo">
                  
                    <i class="fa fa-fw fa-weibo"></i>
                  
                  Weibo
                </a>
              </span>
            
          
        </div>

        
        

        
        

        


      </section>

      
      <!--noindex-->
        <section class="post-toc-wrap motion-element sidebar-panel sidebar-panel-active">
          <div class="post-toc">

            
              
            

            
              <div class="post-toc-content"><ol class="nav"><li class="nav-item nav-level-4"><a class="nav-link" href="#HDFS-Hadoop-Distributed-File-System"><span class="nav-number">1.</span> <span class="nav-text">HDFS: Hadoop Distributed File System</span></a></li><li class="nav-item nav-level-4"><a class="nav-link" href="#YARN-Yet-Another-Resource-Negotiator"><span class="nav-number">2.</span> <span class="nav-text">YARN: Yet Another Resource Negotiator</span></a></li><li class="nav-item nav-level-4"><a class="nav-link" href="#MapReduce"><span class="nav-number">3.</span> <span class="nav-text">MapReduce</span></a></li><li class="nav-item nav-level-4"><a class="nav-link" href="#Pig"><span class="nav-number">4.</span> <span class="nav-text">Pig</span></a></li><li class="nav-item nav-level-4"><a class="nav-link" href="#Hive"><span class="nav-number">5.</span> <span class="nav-text">Hive</span></a></li><li class="nav-item nav-level-4"><a class="nav-link" href="#Apache-Ambari"><span class="nav-number">6.</span> <span class="nav-text">Apache Ambari</span></a></li><li class="nav-item nav-level-4"><a class="nav-link" href="#MESOS"><span class="nav-number">7.</span> <span class="nav-text">MESOS</span></a></li><li class="nav-item nav-level-4"><a class="nav-link" href="#Spark"><span class="nav-number">8.</span> <span class="nav-text">Spark</span></a></li><li class="nav-item nav-level-4"><a class="nav-link" href="#TEZ"><span class="nav-number">9.</span> <span class="nav-text">TEZ</span></a></li><li class="nav-item nav-level-4"><a class="nav-link" href="#HBASE"><span class="nav-number">10.</span> <span class="nav-text">HBASE</span></a></li><li class="nav-item nav-level-4"><a class="nav-link" href="#STORM"><span class="nav-number">11.</span> <span class="nav-text">STORM</span></a></li><li class="nav-item nav-level-4"><a class="nav-link" href="#OOZIE"><span class="nav-number">12.</span> <span class="nav-text">OOZIE</span></a></li><li class="nav-item nav-level-4"><a class="nav-link" href="#ZooKeeper"><span class="nav-number">13.</span> <span class="nav-text">ZooKeeper</span></a></li><li class="nav-item nav-level-4"><a class="nav-link" href="#Data-Ingestion"><span class="nav-number">14.</span> <span class="nav-text">Data Ingestion</span></a></li></ol></div>
            

          </div>
        </section>
      <!--/noindex-->
      

    </div>
  </aside>


        
      </div>
    </main>

    <footer id="footer" class="footer">
      <div class="footer-inner">
        <div class="copyright" >
  
  &copy; 
  <span itemprop="copyrightYear">2018</span>
  <span class="with-love">
    <i class="fa fa-heart"></i>
  </span>
  <span class="author" itemprop="copyrightHolder">Anna</span>
</div>


<div class="powered-by">
  Powered by <a class="theme-link" href="https://hexo.io">Hexo</a>
</div>

<div class="theme-info">
  Theme -
  <a class="theme-link" href="https://github.com/iissnan/hexo-theme-next">
    NexT.Mist
  </a>
</div>


        

        
      </div>
    </footer>

    <div class="back-to-top">
      <i class="fa fa-arrow-up"></i>
    </div>
  </div>

  

<script type="text/javascript">
  if (Object.prototype.toString.call(window.Promise) !== '[object Function]') {
    window.Promise = null;
  }
</script>









  




  
  <script type="text/javascript" src="/lib/jquery/index.js?v=2.1.3"></script>

  
  <script type="text/javascript" src="/lib/fastclick/lib/fastclick.min.js?v=1.0.6"></script>

  
  <script type="text/javascript" src="/lib/jquery_lazyload/jquery.lazyload.js?v=1.9.7"></script>

  
  <script type="text/javascript" src="/lib/velocity/velocity.min.js?v=1.2.1"></script>

  
  <script type="text/javascript" src="/lib/velocity/velocity.ui.min.js?v=1.2.1"></script>

  
  <script type="text/javascript" src="/lib/fancybox/source/jquery.fancybox.pack.js?v=2.1.5"></script>


  


  <script type="text/javascript" src="/js/src/utils.js?v=5.1.0"></script>

  <script type="text/javascript" src="/js/src/motion.js?v=5.1.0"></script>



  
  

  
  <script type="text/javascript" src="/js/src/scrollspy.js?v=5.1.0"></script>
<script type="text/javascript" src="/js/src/post-details.js?v=5.1.0"></script>



  


  <script type="text/javascript" src="/js/src/bootstrap.js?v=5.1.0"></script>



  



  




	





  





  

  
      <!-- UY BEGIN -->
      <script type="text/javascript" src="http://v2.uyan.cc/code/uyan.js?uid="></script>
      <!-- UY END --><!-- hexo-inject:begin --><!-- Begin: Injected MathJax -->
<script type="text/x-mathjax-config">
  MathJax.Hub.Config({"tex2jax":{"inlineMath":[["$","$"],["\\(","\\)"]],"skipTags":["script","noscript","style","textarea","pre","code"],"processEscapes":true},"TeX":{"equationNumbers":{"autoNumber":"AMS"}}});
</script>

<script type="text/x-mathjax-config">
  MathJax.Hub.Queue(function() {
    var all = MathJax.Hub.getAllJax(), i;
    for(i=0; i < all.length; i += 1) {
      all[i].SourceElement().parentNode.className += ' has-jax';
    }
  });
</script>

<script type="text/javascript" src="//cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML">
</script>
<!-- End: Injected MathJax -->
<!-- hexo-inject:end -->
  




  
  

  
  


  

  

  


</body>
</html>
